# from my_py_toolkit.file.file_toolkit import *
import json
from tqdm import tqdm
def readjson(file_path):
  """"""
#   make_path_legal(file_path)
  with open(file_path, "r", encoding="utf-8") as f:
    return json.load(f)
  

def writejsonl(datas, file_path):
    # make_path_legal(file_path)
    with open(file_path, 'w', encoding='utf-8') as w:
        for d in datas:
            w.write(json.dumps(d, ensure_ascii=False) + '\n')

# label studio
"""
[{"id":2,"annotations":[{"id":2,"completed_by":1,"result":[{"value":{"start":2,"end":4,"text":"胸闷","labels":["主诉_症状名称"]},"id":"a7672120435811efa066e1dc31897bd1","from_name":"label","to_name":"text","type":"labels","origin":"prediction"},{"value":{"start":4,"end":7,"text":"21年","labels":["主诉_症状_总病程"]},"id":"a7672121435811efa066e1dc31897bd1","from_name":"label","to_name":"text","type":"labels","origin":"prediction-changed"},{"value":{"start":10,"end":12,"text":"1周","labels":["主诉_症状_本次发病时长"]},"id":"a7672122435811efa066e1dc31897bd1","from_name":"label","to_name":"text","type":"labels","origin":"prediction-changed"},{"value":{"start":14,"end":16,"text":"头疼","labels":["主诉_伴随症状_名称"]},"id":"a7672123435811efa066e1dc31897bd1","from_name":"label","to_name":"text","type":"labels","origin":"prediction"},{"value":{"start":16,"end":18,"text":"2月","labels":["主诉_伴随症状_时长"]},"id":"a7672124435811efa066e1dc31897bd1","from_name":"label","to_name":"text","type":"labels","origin":"prediction-changed"},{"from_id":"a7672120435811efa066e1dc31897bd1","to_id":"a7672121435811efa066e1dc31897bd1","type":"relation","direction":"right"},{"from_id":"a7672120435811efa066e1dc31897bd1","to_id":"a7672122435811efa066e1dc31897bd1","type":"relation","direction":"right"},{"from_id":"a7672123435811efa066e1dc31897bd1","to_id":"a7672124435811efa066e1dc31897bd1","type":"relation","direction":"right"}],"was_cancelled":false,"ground_truth":false,"created_at":"2024-07-16T09:56:23.477368Z","updated_at":"2024-07-16T11:44:00.105469Z","draft_created_at":"2024-07-16T09:56:03.060409Z","lead_time":511.985,"prediction":{},"result_count":0,"unique_id":"0351d80b-b220-4517-9482-a4384bafa4a9","import_id":null,"last_action":null,"task":2,"project":2,"updated_by":1,"parent_prediction":1,"parent_annotation":null,"last_created_by":null}],"file_upload":"499e6017-test.txt","drafts":[],"predictions":[1],"data":{"text":"间断胸闷21年，加重1周，伴头疼2月"},"meta":{},"created_at":"2024-07-16T08:11:21.850295Z","updated_at":"2024-07-16T11:44:00.128868Z","inner_id":1,"total_annotations":1,"cancelled_annotations":0,"total_predictions":1,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":2,"updated_by":1,"comment_authors":[]}]
"""

# uie  jsonl
"""
{"id": 2987, "text": "间断咳嗽、咳痰20余年，加重伴发热、气喘6天", "entities": [{"id": 75848, "label": "主诉_症状_总病程", "start_offset": 7, "end_offset": 11}, {"id": 75849, "label": "主诉_伴随症状_名称", "start_offset": 15, "end_offset": 17}, {"id": 75850, "label": "主诉_伴随症状_名称", "start_offset": 18, "end_offset": 20}, {"id": 75851, "label": "主诉_伴随症状_时长", "start_offset": 20, "end_offset": 22}, {"id": 111358, "label": "主诉_症状名称", "start_offset": 2, "end_offset": 4}, {"id": 111359, "label": "主诉_症状名称", "start_offset": 5, "end_offset": 7}, {"id": 111360, "label": "主诉_症状_本次发病时长", "start_offset": 20, "end_offset": 22}, {"id": 111363, "label": "主诉_症状_本次情况", "start_offset": 12, "end_offset": 14}, {"id": 111364, "label": "主诉_症状_描述", "start_offset": 0, "end_offset": 2}], "relations": [{"id": 4892, "from_id": 111358, "to_id": 111364, "type": "描述"}, {"id": 4893, "from_id": 111359, "to_id": 111364, "type": "描述"}, {"id": 4894, "from_id": 111358, "to_id": 75848, "type": "总病程"}, {"id": 4895, "from_id": 111359, "to_id": 75848, "type": "总病程"}, {"id": 4896, "from_id": 111358, "to_id": 111363, "type": "本次情况"}, {"id": 4897, "from_id": 111359, "to_id": 111363, "type": "本次情况"}, {"id": 4898, "from_id": 75850, "to_id": 75851, "type": "时长"}, {"id": 4899, "from_id": 75849, "to_id": 75851, "type": "时长"}, {"id": 4900, "from_id": 111359, "to_id": 111360, "type": "本次发病时长"}, {"id": 4901, "from_id": 111358, "to_id": 111360, "type": "本次发病时长"}], "Comments": []}
"""


def cvt_ls2uie(datas):
    data_ids = 300
    entities_ids = 10000
    relations_ids = 20000

    result = []
    
    for data in tqdm(datas):
        entites = []
        relations = []
        mapping = {}
        id_mapping = {}
        text = data['data']['text']
        labels = data['annotations'][0]['result']
        for label in labels:
            if len(label) == 4:
                # 关系
                head_id = id_mapping[label['from_id']]
                tail_id = id_mapping[label['to_id']]
                rel = mapping[tail_id]
                cur = {"id": relations_ids, "from_id": head_id, "to_id": tail_id, "type": rel}
                relations.append(cur)

                relations_ids += 1
            else:
                # 实体
                cur_id = entities_ids
                entities_ids += 1
                label_id = label['id']
                label = label['value']
                cur = {"id": cur_id, 
                       "label": label['labels'][0], 
                       "start_offset": label['start'], 
                       "end_offset": label['end']}
                entites.append(cur)
                mapping[cur_id] = label['labels'][0]
                id_mapping[label_id] = cur_id
        
        cur_data = {
            'id': data_ids,
            'entites': entites,
            'relations': relations,
            'text': text
        }
        result.append(cur_data)

        data_ids += 1
    return result

if __name__ == "__main__":
    data_path = r'H:\work\文档\code\label_stduio\project-2-at-2024-07-16-12-01-c61a4919.json'
    save_path = r'H:\work\文档\code\label_stduio\project-2-at-2024-07-16-12-01-c61a4919.jsonl'
    datas = readjson(data_path)
    res = cvt_ls2uie(datas)
    writejsonl(res, save_path)
